import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# 读取包含所有四种数据的 CSV 文件
all_data = pd.read_csv('advertising.csv')

# 划分数据集
X = all_data[['taobao','tiktok','little red book']]
y = all_data['sales']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 选择线性回归模型
model = LinearRegression()

# 训练模型
model.fit(X_train, y_train)

# 评估模型
y_pred_train = model.predict(X_train)
mse_train = mean_squared_error(y_train, y_pred_train)
print(f'Mean Squared Error on Training Data: {mse_train}')

# 进行预测
X_new = pd.DataFrame({'taobao': [200], 'tiktok': [100], 'little red book': [150]})
y_pred_new = model.predict(X_new)
print(f'Predicted Output for New Input: {y_pred_new[0]}')

X_new = pd.DataFrame({'taobao': [300], 'tiktok': [150], 'little red book': [200]})
y_pred_new = model.predict(X_new)
print(f'Predicted Output for New Input: {y_pred_new[0]}')